#Loading libraries
suppressMessages(library(haven))
suppressMessages(library(dplyr))
suppressMessages(library(psych))
suppressMessages(library(tidyr))
suppressMessages(library(ggplot2))
suppressMessages(library(lcmm))
suppressMessages(library(tidyLPA))
#Loading data
d_raw <- read_sav("Hall.Lewis.Ellsworth(2018)_LongitudinalClimateChangeBeliefsandBehavior_OSF.sav")
#Dataset w/ variables of interest
#Variables of interest
d <- d_raw |>
select(c( 'T1_important', 'T2_important', 'T3_important',
'T4_important', 'T5_important', 'T6_important', 'T7_important',
))
#Getting the data w/out any NAs
d_nomiss <- d[complete.cases(d), ]
#Save the data
#write.csv(d_nomiss, "d_nomiss.csv", row.names = FALSE)
#Quick check corrs and dist
pairs.panels(d_nomiss)
describe(d_nomiss$T1_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 5 1.7 5 5.18 1.48 1 7 6 -0.74 -0.36 0.1
describe(d_nomiss$T2_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.92 1.73 5 5.08 1.48 1 7 6 -0.66 -0.56 0.1
describe(d_nomiss$T3_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.76 1.85 5 4.91 1.48 1 7 6 -0.56 -0.78 0.11
describe(d_nomiss$T4_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.67 1.8 5 4.78 1.48 1 7 6 -0.48 -0.86 0.11
describe(d_nomiss$T5_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.72 1.83 5 4.85 1.48 1 7 6 -0.58 -0.83 0.11
describe(d_nomiss$T6_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.81 1.87 5 4.98 1.48 1 7 6 -0.61 -0.74 0.11
describe(d_nomiss$T7_important)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 292 4.92 1.81 5 5.09 1.48 1 7 6 -0.68 -0.62 0.11
#Prepping the data
#Adding the subjects' id
d_nomiss <- d_nomiss |>
mutate(id = row_number())
#Data into the long format
d_long <- d_nomiss |>
pivot_longer(
cols = -id,
names_to = c("time", ".value"),
names_pattern = "(T\\d+)_(.*)"
)
str(d_long)
## tibble [2,044 × 3] (S3: tbl_df/tbl/data.frame)
## $ id : int [1:2044] 1 1 1 1 1 1 1 2 2 2 ...
## $ time : chr [1:2044] "T1" "T2" "T3" "T4" ...
## $ important: num [1:2044] 2 2 4 4 3 4 4 6 4 5 ...
#To save the dataset in the long format
#write.csv(d_long, "d_long.csv", row.names = FALSE)
#Important across time distribution
hist_imp <- ggplot(d_long, aes(x = important)) +
geom_histogram(bins = 7, fill = "skyblue", color = "black") +
facet_wrap(~ time, scales = "free", ncol = 4) +
labs(title = "",
x = "Importance CC Level",
y = "Frequency") +
theme_classic()
print(hist_imp)
#Spaghetti plot for Important
#Converting the Time variable into the factor
d_long$time <- factor(d_long$time, levels = c("T1", "T2", "T3", "T4", "T5", "T6", "T7"))
#Subset 100 random participants for better visibility of the plot
sampled_ids <- sample(unique(d_long$id), 100)
#Plotting the random subset
ggplot(d_long |> filter(id %in% sampled_ids), aes(x = time, y = important, group = id, color = as.factor(id))) +
geom_line(alpha = 0.4) +
labs(title = "",
x = "Time", y = "Importance Level", color = "ID") +
theme_classic()
#Plotting the subset with the LOESS curve for the overall trend -- doesn't look pretty
ggplot(d_long |> filter(id %in% sampled_ids), aes(x = time, y = important, group = id, color = as.factor(id))) +
geom_line(alpha = 0.4) +
geom_smooth(method = "loess", se = FALSE, color = "grey23", span = 0.75, size = .5) +
labs(title = "Sample of Individual Trajectories for 'Important' Over Time",
x = "Time", y = "Importance Level") +
scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
theme_minimal() +
theme(legend.position = "none")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 590 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
#Average Importance at each time point across participants
average_importance <- d_long |>
group_by(time) |>
summarise(average_important = mean(important, na.rm = TRUE))
#Plotting the average trend
ggplot(average_importance, aes(x = time, y = average_important, group = 1)) + #group = 1 to ensure it treats all data as one group
geom_line(color = "blue", size = 1.5) +
geom_point(color = "red", size = 3) +
labs(
title = "Average Importance Over Time",
x = "Time",
y = "Average Importance"
) +
scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
theme_minimal()
#Individual and Average overall trajectories
ggplot(d_long |> filter(id %in% sampled_ids), aes(
x = time, y = important, group = id, color = as.factor(id))) +
geom_line(alpha = 0.4) +
geom_line(data = average_importance, aes(x = time,
y = average_important, group = 1),
color = "black", size = 1.5) + #overall average trend line
labs(title = "",
x = "Time point", y = "Importance Level") +
theme_classic() +
theme(legend.position = "none")
#Trajectories for those in Low vs High group of Importance at T1
#Preparing the data for further plotting
d_long <- d_long |>
group_by(id) |>
mutate(initial_important = important[time == "T1"]) |>
ungroup()
#Calculating the cutoff for median value
cutoff <- median(d_long$initial_important, na.rm = TRUE)
#The grouping variable
d_long <- d_long |>
mutate(group_at_T1 = ifelse(initial_important >= cutoff, "Above", "Below"))
#How many observations/% are above or below the median
t <- table(d_long$group_at_T1)
t_p <- t / sum(t) * 100
print(t_p)
##
## Above Below
## 68.83562 31.16438
#Plotting with group separation
ggplot(d_long, aes(x = time, y = important,
group = id, color = group_at_T1)) +
geom_line(alpha = 0.4) +
facet_wrap(~group_at_T1, scales = "free_y") +
geom_smooth(method = "loess", se = FALSE, aes(
group = group_at_T1), color = "black") +
labs(title = "",
x = "Time Point", y = "Importance Level", color = "Median Group") +
scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
theme_classic() +
theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'
#Fitting a simple GMM step by step. Linear
#Fit a simple linear growth model
basic_model <- hlme(fixed = important ~ time, random = ~ time, subject = 'id', data = d_long)
summary(basic_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ time, random = ~time, subject = "id",
## data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 36
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 13
## Convergence criteria: parameters= 8.5e-07
## : likelihood= 1.9e-05
## : second derivatives= 3.5e-11
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2761
## AIC: 5594
## BIC: 5726.36
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 5.00000 0.09926 50.373 0.00000
## timeT2 -0.08219 0.05691 -1.444 0.14868
## timeT3 -0.24315 0.06917 -3.515 0.00044
## timeT4 -0.32877 0.07123 -4.615 0.00000
## timeT5 -0.27740 0.07446 -3.725 0.00020
## timeT6 -0.19178 0.07514 -2.552 0.01071
## timeT7 -0.08219 0.07347 -1.119 0.26325
##
##
## Variance-covariance matrix of the random-effects:
## intercept timeT2 timeT3 timeT4 timeT5 timeT6 timeT7
## intercept 2.57959
## timeT2 -0.11726 0.35105
## timeT3 -0.14123 0.33358 0.80211
## timeT4 -0.27479 0.27859 0.74622 0.88671
## timeT5 -0.27822 0.22117 0.69707 0.83770 1.02401
## timeT6 -0.22000 0.23848 0.68022 0.73571 0.90995 1.05390
## timeT7 -0.29534 0.22352 0.66577 0.75120 0.89925 0.91314 0.98119
##
## coef Se
## Residual standard error: 0.54509 0.55575
#Latent class models
lcga1 <- hlme(important ~ time, subject = "id", ng = 1, data = d_long)
lcga2 <- gridsearch(rep = 100, maxiter = 10, minit = lcga1,
m = hlme(important ~ time, subject = "id",
ng = 2, data = d_long, mixture = ~ time))
lcga3 <- gridsearch(rep = 100, maxiter = 10, minit = lcga1, m = hlme(
important ~ time, subject = "id", ng = 3, data = d_long, mixture = ~ time))
#Compare the models with diff numbers of clusters
summarytable(lcga1, lcga2, lcga3)
## G loglik npm BIC %class1 %class2 %class3
## lcga1 1 -4097.333 8 8240.079 100.00000
## lcga2 2 -3235.341 16 6561.509 31.16438 68.83562
## lcga3 3 -2989.113 24 6114.468 28.42466 47.60274 23.9726
summary(lcga3)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ time, mixture = ~time, subject = "id",
## ng = 3, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 24
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 1
## Convergence criteria: parameters= 2.9e-13
## : likelihood= 1.4e-11
## : second derivatives= 1.3e-15
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2989.11
## AIC: 6026.23
## BIC: 6114.47
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.15621 0.17696 0.883 0.37737
## intercept class2 0.72535 0.17010 4.264 0.00002
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 4.58803 0.13915 32.971 0.00000
## intercept class2 6.24319 0.08339 74.865 0.00000
## intercept class3 2.91385 0.12056 24.170 0.00000
## timeT2 class1 0.12065 0.15118 0.798 0.42485
## timeT2 class2 -0.07128 0.10878 -0.655 0.51232
## timeT2 class3 -0.34186 0.15867 -2.155 0.03119
## timeT3 class1 -0.17737 0.15229 -1.165 0.24414
## timeT3 class2 -0.02540 0.10893 -0.233 0.81561
## timeT3 class3 -0.76980 0.16476 -4.672 0.00000
## timeT4 class1 -0.30389 0.15034 -2.021 0.04325
## timeT4 class2 -0.15869 0.10865 -1.461 0.14413
## timeT4 class3 -0.70914 0.15839 -4.477 0.00001
## timeT5 class1 -0.19358 0.15385 -1.258 0.20832
## timeT5 class2 -0.11146 0.10901 -1.022 0.30656
## timeT5 class3 -0.71812 0.15726 -4.567 0.00000
## timeT6 class1 -0.04687 0.15753 -0.298 0.76607
## timeT6 class2 0.01133 0.10790 0.105 0.91641
## timeT6 class3 -0.78070 0.15958 -4.892 0.00000
## timeT7 class1 0.06349 0.15721 0.404 0.68633
## timeT7 class2 0.06633 0.10939 0.606 0.54429
## timeT7 class3 -0.55926 0.15765 -3.548 0.00039
##
## coef Se
## Residual standard error: 0.90905 0.01443
#Fit GMM w/ only Random intercept for Importance
set.seed(2002)
#gmm1 <- hlme(important ~ time, subject = "id", random = ~1,
#ng = 1, data = d_long)
#gmm2 <- gridsearch(rep = 100, maxiter = 10, minit = gmm1,
# hlme(important ~ time, subject = "id", random = ~1,
#ng = 2, data = d_long,
# mixture = ~ time, nwg = TRUE))
#gmm3 <- gridsearch(rep = 100, maxiter = 10, minit = gmm1,
# hlme(important ~ time, subject = "id",
#random = ~1, ng = 3, data = d_long,
# mixture = ~ time, nwg = TRUE))
#To save the outputs
#gmm1, gmm2, gmm3
#save(gmm1, file = 'gmm1.Rdata')
#save(gmm2, file = 'gmm2.Rdata')
#save(gmm3, file = 'gmm3.Rdata')
load('gmm1.Rdata')
load('gmm2.Rdata')
load('gmm3.Rdata')
#Compare the models with diff numbers of clusters
summarytable(gmm1, gmm2, gmm3)
## G loglik npm BIC %class1 %class2 %class3
## gmm1 1 -2869.338 9 5789.767 100.00000
## gmm2 2 -2806.669 18 5715.519 68.49315 31.506849
## gmm3 3 -2750.968 27 5655.208 68.83562 1.712329 29.45205
#Fit GMM w/ Random intercept and Slopes for Importance
set.seed(2002)
#gmm1_2 <- hlme(important ~ time, subject = "id", random = ~1 + time, ng = 1, data = d_long)
#gmm2_2 <- gridsearch(rep = 50, maxiter = 10, minit = gmm1_2,
# hlme(important ~ time, subject = "id", random = ~1 + time, ng = 2,
# data = d_long, mixture = ~ time, nwg = TRUE))
#gmm3_2 <- gridsearch(rep = 50, maxiter = 10, minit = gmm1_2,
# hlme(important ~ time, subject = "id", random = ~1 + time, ng = 3,
# data = d_long, mixture = ~ time, nwg = TRUE))
#To save the outputs
#save(gmm1_2, file = 'gmm1_2.Rdata')
#save(gmm2_2, file = 'gmm2_2.Rdata')
#save(gmm3_2, file = 'gmm3_2.Rdata')
load('gmm1_2.Rdata')
load('gmm2_2.Rdata')
load('gmm3_2.Rdata')
#Compare the models with diff numbers of clusters
summarytable(gmm1_2, gmm2_2, gmm3_2)
## G loglik npm BIC %class1 %class2 %class3
## gmm1_2 1 -2761.001 36 5726.365 100.00000
## gmm2_2 2 -2577.978 45 5411.411 42.12329 57.87671
## gmm3_2 3 -2298.208 54 4902.960 14.72603 10.61644 74.65753
summary(gmm3_2)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ time, mixture = ~time, random = ~1 +
## time, subject = "id", ng = 3, nwg = TRUE, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 54
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 22
## Convergence criteria: parameters= 2.7e-06
## : likelihood= 4e-06
## : second derivatives= 2.4e-11
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2298.21
## AIC: 4704.42
## BIC: 4902.96
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 -1.36400 0.22930 -5.949 0.00000
## intercept class2 -1.90302 0.19563 -9.728 0.00000
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 4.41382 0.78535 5.620 0.00000
## intercept class2 6.74202 0.02998 224.896 0.00000
## intercept class3 4.89008 0.19009 25.725 0.00000
## timeT2 class1 -0.06770 0.24545 -0.276 0.78269
## timeT2 class2 0.00000 0.00820 0.000 0.99999
## timeT2 class3 -0.09815 0.05961 -1.647 0.09964
## timeT3 class1 -0.62914 0.28077 -2.241 0.02504
## timeT3 class2 0.00000 0.00913 0.000 0.99999
## timeT3 class3 -0.18074 0.06913 -2.614 0.00894
## timeT4 class1 -0.59280 0.27872 -2.127 0.03343
## timeT4 class2 0.00000 0.00912 0.000 0.99999
## timeT4 class3 -0.31029 0.06928 -4.479 0.00001
## timeT5 class1 -0.47067 0.29564 -1.592 0.11138
## timeT5 class2 0.00000 0.00941 0.000 0.99999
## timeT5 class3 -0.26935 0.07342 -3.668 0.00024
## timeT6 class1 -0.30469 0.29992 -1.016 0.30967
## timeT6 class2 0.00000 0.00974 0.000 0.99999
## timeT6 class3 -0.19151 0.07365 -2.600 0.00932
## timeT7 class1 -0.22251 0.28724 -0.775 0.43855
## timeT7 class2 0.00000 0.00934 0.000 0.99999
## timeT7 class3 -0.05858 0.07083 -0.827 0.40824
##
##
## Variance-covariance matrix of the random-effects:
## intercept timeT2 timeT3 timeT4 timeT5 timeT6 timeT7
## intercept 7.32417
## timeT2 -0.24690 0.60375
## timeT3 -0.23270 0.40827 0.81874
## timeT4 -0.28863 0.36215 0.58622 0.80842
## timeT5 -0.27353 0.32922 0.56029 0.59100 0.89436
## timeT6 -0.27033 0.34512 0.54962 0.52689 0.65725 0.94277
## timeT7 -0.27184 0.35428 0.54083 0.57175 0.64208 0.65025 0.8492
##
## coef Se
## Proportional coefficient class1 2.08836 0.09973
## Proportional coefficient class2 0.06172 0.00589
## Residual standard error: 0.00000 0.00244
#Simple GMM. Non-linear
d_long$time_numeric <- as.numeric(sub("T", "", d_long$time))
#Fit a simple growth model
nl_model <- hlme(fixed = important ~ poly(time_numeric, 2),
random = ~ poly(time_numeric, 2),
subject = 'id', data = d_long)
summary(nl_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 2), random = ~poly(time_numeric,
## 2), subject = "id", data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 10
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 15
## Convergence criteria: parameters= 7.3e-07
## : likelihood= 2e-07
## : second derivatives= 1.3e-14
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2787.63
## AIC: 5595.27
## BIC: 5632.04
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 4.82779 0.09651 50.024 0.00000
## poly(...)1 -1.61466 1.07469 -1.502 0.13298
## poly(...)2 4.59728 0.79935 5.751 0.00000
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2
## intercept 2.65700
## poly(...)1 5.13786 209.07143
## poly(...)2 -3.76909 -67.54998 58.40069
##
## coef Se
## Residual standard error: 0.66255 0.01371
#Cubic polynomial growth model
cubic_model <- hlme(fixed = important ~ poly(time_numeric, 3),
random = ~ poly(time_numeric, 3),
subject = 'id', data = d_long)
summary(cubic_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 3), random = ~poly(time_numeric,
## 3), subject = "id", data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 15
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 26
## Convergence criteria: parameters= 3.4e-06
## : likelihood= 2.5e-06
## : second derivatives= 5.5e-06
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2778.25
## AIC: 5586.51
## BIC: 5641.66
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 4.82779 0.09651 50.024 0.00000
## poly(...)1 -1.61469 1.07592 -1.501 0.13342
## poly(...)2 4.59730 0.80067 5.742 0.00000
## poly(...)3 0.43002 0.70944 0.606 0.54442
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3
## intercept 2.66001
## poly(...)1 5.13799 215.40198
## poly(...)2 -3.76904 -67.21056 65.15327
## poly(...)3 0.49366 -33.88439 -17.82144 23.5836
##
## coef Se
## Residual standard error: 0.64648 0.01339
#Quartic polynomial growth model
quartic_model <- hlme(fixed = important ~ poly(time_numeric, 4),
random = ~ poly(time_numeric, 4),
subject = 'id', data = d_long)
summary(quartic_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric,
## 4), subject = "id", data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 21
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 91
## Convergence criteria: parameters= 1.4e-06
## : likelihood= 1.7e-05
## : second derivatives= 8.7e-05
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2769.12
## AIC: 5580.24
## BIC: 5657.45
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 4.82779 0.09651 50.024 0.00000
## poly(...)1 -1.61465 1.07474 -1.502 0.13300
## poly(...)2 4.59727 0.79962 5.749 0.00000
## poly(...)3 0.43008 0.70316 0.612 0.54078
## poly(...)4 -1.13176 0.69547 -1.627 0.10367
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 2.66451
## poly(...)1 5.14393 224.43951
## poly(...)2 -3.77516 -67.53643 73.85687
## poly(...)3 0.49284 -34.37230 -18.65112 31.43926
## poly(...)4 0.00078 25.37764 -25.98317 -4.83866 28.3847
##
## coef Se
## Residual standard error: 0.62163 0.01769
#To save outputs
#save(nl_model, file = 'nl_model.Rdata')
#save(cubic_model, file = 'cubic_model.Rdata')
#save(quartic_model, file = 'quartic_model.Rdata')
#load('nl_model.Rdata')
#load('cubic_model.Rdata')
#load('quartic_model.Rdata')
#Compare models
summarytable(nl_model, cubic_model, quartic_model)
## G loglik npm BIC %class1
## nl_model 1 -2787.635 10 5632.037 100
## cubic_model 1 -2778.255 15 5641.661 100
## quartic_model 1 -2769.120 21 5657.452 100
summary(quartic_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric,
## 4), subject = "id", data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 21
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 91
## Convergence criteria: parameters= 1.4e-06
## : likelihood= 1.7e-05
## : second derivatives= 8.7e-05
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2769.12
## AIC: 5580.24
## BIC: 5657.45
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 4.82779 0.09651 50.024 0.00000
## poly(...)1 -1.61465 1.07474 -1.502 0.13300
## poly(...)2 4.59727 0.79962 5.749 0.00000
## poly(...)3 0.43008 0.70316 0.612 0.54078
## poly(...)4 -1.13176 0.69547 -1.627 0.10367
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 2.66451
## poly(...)1 5.14393 224.43951
## poly(...)2 -3.77516 -67.53643 73.85687
## poly(...)3 0.49284 -34.37230 -18.65112 31.43926
## poly(...)4 0.00078 25.37764 -25.98317 -4.83866 28.3847
##
## coef Se
## Residual standard error: 0.62163 0.01769
#Latent class nonlinear growth model with polynomial terms
set.seed(2002)
#nl_lcga1 <- hlme(fixed = important ~ poly(time_numeric, 3), subject = "id", ng = 1, data = d_long)
#nl_lcga2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_lcga1,
# m = hlme(fixed = important ~ poly(time_numeric, 3),
# subject = "id", ng = 2, data = d_long,
# mixture = ~ poly(time_numeric, 3)))
#nl_lcga3 <- gridsearch(rep = 50, maxiter = 10, minit = nl_lcga1,
# m = hlme(fixed = important ~ poly(time_numeric, 3),
# subject = "id", ng = 3, data = d_long,
# mixture = ~ poly(time_numeric, 3)))
#To save outputs
#save(nl_lcga1, file = 'nl_lcga1.Rdata')
#save(nl_lcga2, file = 'nl_lcga2.Rdata')
#save(nl_lcga3, file = 'nl_lcga3.Rdata')
load('nl_lcga1.Rdata')
load('nl_lcga2.Rdata')
load('nl_lcga3.Rdata')
#Compare models
summarytable(nl_lcga1, nl_lcga2, nl_lcga3)
## G loglik npm BIC %class1 %class2 %class3
## nl_lcga1 1 -4097.555 5 8223.493 100.00000
## nl_lcga2 2 -3236.616 10 6530.000 68.83562 31.16438
## nl_lcga3 3 -2993.315 15 6071.781 28.42466 47.60274 23.9726
summary(nl_lcga3)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 3), mixture = ~poly(time_numeric,
## 3), subject = "id", ng = 3, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 15
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 1
## Convergence criteria: parameters= 2.7e-06
## : likelihood= 1.4e-06
## : second derivatives= 2.3e-09
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2993.31
## AIC: 6016.63
## BIC: 6071.78
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.15115 0.17954 0.842 0.39988
## intercept class2 0.71649 0.18095 3.960 0.00008
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 4.52114 0.12733 35.507 0.00000
## intercept class2 6.20454 0.04876 127.251 0.00000
## intercept class3 2.36593 0.08121 29.135 0.00000
## poly(...)1 class1 -0.46427 2.16701 -0.214 0.83036
## poly(...)1 class2 0.87565 1.36822 0.640 0.52218
## poly(...)1 class3 -8.05093 1.93781 -4.155 0.00003
## poly(...)2 class1 4.98746 1.80793 2.759 0.00580
## poly(...)2 class2 2.54575 1.32793 1.917 0.05523
## poly(...)2 class3 8.34346 1.94792 4.283 0.00002
## poly(...)3 class1 1.62255 1.83602 0.884 0.37684
## poly(...)3 class2 0.50938 1.32228 0.385 0.70007
## poly(...)3 class3 -1.11959 1.93842 -0.578 0.56355
##
## coef Se
## Residual standard error: 0.91109 0.01450
#GMM with nonlinear random effects Cubic. Random intercept for Importance
set.seed(2002)
#nl_gmm1 <- hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1, ng = 1, data = d_long)
#nl_gmm2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1,
# hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1,
# ng = 2, data = d_long,
# mixture = ~ poly(time_numeric, 3), nwg = TRUE))
#nl_gmm3 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1,
# hlme(important ~ poly(time_numeric, 3), subject = "id",
# random = ~1, ng = 3, data = d_long,
# mixture = ~ poly(time_numeric, 3), nwg = TRUE))
#Save outputs
#save(nl_gmm1, file = 'nl_gmm1.Rdata')
#save(nl_gmm2, file = 'nl_gmm2.Rdata')
#save(nl_gmm3, file = 'nl_gmm3.Rdata')
load('nl_gmm1.Rdata')
load('nl_gmm2.Rdata')
load('nl_gmm3.Rdata')
#Compare the models with diff numbers of clusters
summarytable(nl_gmm1, nl_gmm2, nl_gmm3)
## G loglik npm BIC %class1 %class2 %class3
## nl_gmm1 1 -2778.255 15 5641.661 100.00000
## nl_gmm2 2 -2694.605 21 5508.423 55.47945 44.520548
## nl_gmm3 3 -2676.508 27 5506.288 55.47945 4.794521 39.72603
#Fit GMM w/ Random intercept and Slopes for Importance
set.seed(2002)
#nl_gmm1_2 <- hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1 + poly(time_numeric, 3), ng = 1, data = d_long)
#nl_gmm2_2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1,
# hlme(important ~ poly(time_numeric, 3), subject = "id",
# random = ~1 + poly(time_numeric, 3),
# ng = 2, data = d_long,
# mixture = ~ poly(time_numeric, 3), nwg = TRUE))
#nl_gmm3_2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1,
# hlme(important ~ poly(time_numeric, 3), subject = "id",
# random = ~1 + poly(time_numeric, 3), ng = 3, data = d_long,
# mixture = ~ poly(time_numeric, 3), nwg = TRUE))
#Quartic model for GMM random slope/random intercept
#nl_gmm4_2 <- gridsearch(rep = 50, maxiter = 10, minit = quartic_model,
# hlme(important ~ poly(time_numeric, 4), subject = "id",
# random = ~1 + poly(time_numeric, 4), ng = 3,
# data = d_long,
# mixture = ~ poly(time_numeric, 4), nwg = TRUE))
#nl_gmm4_2a <- gridsearch(rep = 50, maxiter = 10, minit = quartic_model,
# hlme(important ~ poly(time_numeric, 4), subject = "id",
# random = ~1 + poly(time_numeric, 4), ng = 2,
# data = d_long,
# mixture = ~ poly(time_numeric, 4), nwg = TRUE))
#To save outputs
#save(nl_gmm1_2, file = 'nl_gmm1_2.Rdata')
#save(nl_gmm2_2, file = 'nl_gmm2_2.Rdata')
#save(nl_gmm3_2, file = 'nl_gmm3_2.Rdata')
#save(nl_gmm4_2, file = 'nl_gmm4_2.Rdata')
#save(nl_gmm4_2a, file = 'nl_gmm4_2a.Rdata')
load('nl_gmm1_2.Rdata')
load('nl_gmm2_2.Rdata')
load('nl_gmm3_2.Rdata')
load('nl_gmm4_2.Rdata')
load('nl_gmm4_2a.Rdata')
#Compare the models with diff numbers of clusters
summarytable(nl_gmm1_2, nl_gmm2_2, nl_gmm3_2, nl_gmm4_2, nl_gmm4_2a)
## G loglik npm BIC %class1 %class2 %class3
## nl_gmm1_2 1 -2778.255 15 5641.661 100.00000
## nl_gmm2_2 2 -2694.605 21 5508.423 55.47945 44.520548
## nl_gmm4_2a 2 -2668.567 28 5496.083 54.45205 45.547945
## nl_gmm3_2 3 -2676.508 27 5506.288 55.47945 4.794521 39.72603
## nl_gmm4_2 3 -2644.492 35 5487.670 54.79452 5.136986 40.06849
summary(nl_gmm4_2)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric,
## 4), random = ~1 + poly(time_numeric, 4), subject = "id",
## ng = 3, nwg = TRUE, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 35
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 1
## Convergence criteria: parameters= 5.7e-06
## : likelihood= 1.9e-07
## : second derivatives= 1e-08
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2644.49
## AIC: 5358.98
## BIC: 5487.67
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.19079 0.22496 0.848 0.39636
## intercept class2 -1.89618 0.39903 -4.752 0.00000
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 6.06240 0.08533 71.050 0.00000
## intercept class2 3.80799 0.88291 4.313 0.00002
## intercept class3 3.48675 0.21962 15.876 0.00000
## poly(...)1 class1 0.14202 1.17294 0.121 0.90362
## poly(...)1 class2 3.33939 10.22462 0.327 0.74397
## poly(...)1 class3 -4.48520 1.88940 -2.374 0.01760
## poly(...)2 class1 2.37450 1.01521 2.339 0.01934
## poly(...)2 class2 10.72373 7.34014 1.461 0.14402
## poly(...)2 class3 6.36761 1.42746 4.461 0.00001
## poly(...)3 class1 1.02557 0.94443 1.086 0.27752
## poly(...)3 class2 3.77645 6.49285 0.582 0.56081
## poly(...)3 class3 -0.79298 1.36100 -0.583 0.56013
## poly(...)4 class1 0.59568 0.97924 0.608 0.54298
## poly(...)4 class2 -2.77164 5.74204 -0.483 0.62931
## poly(...)4 class3 -2.97614 1.39955 -2.126 0.03346
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 1.80174
## poly(...)1 3.50138 200.75178
## poly(...)2 -1.57054 -47.39359 85.32228
## poly(...)3 -0.26943 -27.80353 -17.66082 58.38695
## poly(...)4 -3.59686 8.56736 -15.02654 -3.40329 50.79176
##
## coef Se
## Proportional coefficient class1 0.43556 0.05540
## Proportional coefficient class2 2.76164 0.35362
## Residual standard error: 0.56763 0.01320
summary(nl_gmm3_2)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 3), mixture = ~poly(time_numeric,
## 3), random = ~1 + poly(time_numeric, 3), subject = "id",
## ng = 3, nwg = TRUE, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 27
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 4
## Convergence criteria: parameters= 1.6e-06
## : likelihood= 6.6e-09
## : second derivatives= 5.4e-14
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2676.51
## AIC: 5407.02
## BIC: 5506.29
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.25073 0.23689 1.058 0.28986
## intercept class2 -1.93928 0.43506 -4.458 0.00001
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 6.04779 0.09016 67.078 0.00000
## intercept class2 3.82640 0.90442 4.231 0.00002
## intercept class3 3.40414 0.23447 14.518 0.00000
## poly(...)1 class1 0.24464 1.17743 0.208 0.83541
## poly(...)1 class2 4.32167 10.41132 0.415 0.67807
## poly(...)1 class3 -4.85749 2.05679 -2.362 0.01819
## poly(...)2 class1 2.69501 1.04042 2.590 0.00959
## poly(...)2 class2 11.41774 9.48654 1.204 0.22876
## poly(...)2 class3 6.06080 1.62947 3.719 0.00020
## poly(...)3 class1 0.55181 1.00672 0.548 0.58361
## poly(...)3 class2 6.19991 7.25660 0.854 0.39289
## poly(...)3 class3 -0.55619 1.45656 -0.382 0.70257
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3
## intercept 1.65369
## poly(...)1 2.64439 184.25040
## poly(...)2 -1.91815 -49.59072 77.34515
## poly(...)3 0.91305 -27.68254 -17.17365 48.83857
##
## coef Se
## Proportional coefficient class1 0.45352 0.06566
## Proportional coefficient class2 2.82788 0.43949
## Residual standard error: 0.61158 0.01275
summary(nl_gmm4_2a)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric,
## 4), random = ~1 + poly(time_numeric, 4), subject = "id",
## ng = 2, nwg = TRUE, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 2
## Number of parameters: 28
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 1
## Convergence criteria: parameters= 1e-07
## : likelihood= 2.3e-08
## : second derivatives= 2.7e-10
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2668.57
## AIC: 5393.13
## BIC: 5496.08
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.05299 0.16370 0.324 0.74617
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 6.05673 0.07170 84.479 0.00000
## intercept class2 3.53197 0.16521 21.378 0.00000
## poly(...)1 class1 -0.02020 1.21610 -0.017 0.98675
## poly(...)1 class2 -3.29596 1.97320 -1.670 0.09485
## poly(...)2 class1 2.36139 0.97825 2.414 0.01578
## poly(...)2 class2 6.95486 1.40516 4.950 0.00000
## poly(...)3 class1 0.99862 0.93882 1.064 0.28746
## poly(...)3 class2 -0.16953 1.26011 -0.135 0.89298
## poly(...)4 class1 0.48238 0.96017 0.502 0.61539
## poly(...)4 class2 -2.83377 1.26370 -2.242 0.02493
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 2.35263
## poly(...)1 6.06111 411.89928
## poly(...)2 -2.58216 -102.76881 168.21534
## poly(...)3 -0.17124 -54.48308 -25.76125 106.66945
## poly(...)4 -4.39797 35.85987 -34.51189 -11.62765 92.73458
##
## coef Se
## Proportional coefficient class1 0.36364 0.03122
## Residual standard error: 0.56840 0.01351
summary(nl_gmm4_2)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric,
## 4), random = ~1 + poly(time_numeric, 4), subject = "id",
## ng = 3, nwg = TRUE, data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 3
## Number of parameters: 35
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 1
## Convergence criteria: parameters= 5.7e-06
## : likelihood= 1.9e-07
## : second derivatives= 1e-08
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2644.49
## AIC: 5358.98
## BIC: 5487.67
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the class-membership model:
## (the class of reference is the last class)
##
## coef Se Wald p-value
## intercept class1 0.19079 0.22496 0.848 0.39636
## intercept class2 -1.89618 0.39903 -4.752 0.00000
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept class1 6.06240 0.08533 71.050 0.00000
## intercept class2 3.80799 0.88291 4.313 0.00002
## intercept class3 3.48675 0.21962 15.876 0.00000
## poly(...)1 class1 0.14202 1.17294 0.121 0.90362
## poly(...)1 class2 3.33939 10.22462 0.327 0.74397
## poly(...)1 class3 -4.48520 1.88940 -2.374 0.01760
## poly(...)2 class1 2.37450 1.01521 2.339 0.01934
## poly(...)2 class2 10.72373 7.34014 1.461 0.14402
## poly(...)2 class3 6.36761 1.42746 4.461 0.00001
## poly(...)3 class1 1.02557 0.94443 1.086 0.27752
## poly(...)3 class2 3.77645 6.49285 0.582 0.56081
## poly(...)3 class3 -0.79298 1.36100 -0.583 0.56013
## poly(...)4 class1 0.59568 0.97924 0.608 0.54298
## poly(...)4 class2 -2.77164 5.74204 -0.483 0.62931
## poly(...)4 class3 -2.97614 1.39955 -2.126 0.03346
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 1.80174
## poly(...)1 3.50138 200.75178
## poly(...)2 -1.57054 -47.39359 85.32228
## poly(...)3 -0.26943 -27.80353 -17.66082 58.38695
## poly(...)4 -3.59686 8.56736 -15.02654 -3.40329 50.79176
##
## coef Se
## Proportional coefficient class1 0.43556 0.05540
## Proportional coefficient class2 2.76164 0.35362
## Residual standard error: 0.56763 0.01320
summary(quartic_model)
## Heterogenous linear mixed model
## fitted by maximum likelihood method
##
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric,
## 4), subject = "id", data = d_long)
##
## Statistical Model:
## Dataset: d_long
## Number of subjects: 292
## Number of observations: 2044
## Number of latent classes: 1
## Number of parameters: 21
##
## Iteration process:
## Convergence criteria satisfied
## Number of iterations: 91
## Convergence criteria: parameters= 1.4e-06
## : likelihood= 1.7e-05
## : second derivatives= 8.7e-05
##
## Goodness-of-fit statistics:
## maximum log-likelihood: -2769.12
## AIC: 5580.24
## BIC: 5657.45
##
##
## Maximum Likelihood Estimates:
##
## Fixed effects in the longitudinal model:
##
## coef Se Wald p-value
## intercept 4.82779 0.09651 50.024 0.00000
## poly(...)1 -1.61465 1.07474 -1.502 0.13300
## poly(...)2 4.59727 0.79962 5.749 0.00000
## poly(...)3 0.43008 0.70316 0.612 0.54078
## poly(...)4 -1.13176 0.69547 -1.627 0.10367
##
##
## Variance-covariance matrix of the random-effects:
## intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept 2.66451
## poly(...)1 5.14393 224.43951
## poly(...)2 -3.77516 -67.53643 73.85687
## poly(...)3 0.49284 -34.37230 -18.65112 31.43926
## poly(...)4 0.00078 25.37764 -25.98317 -4.83866 28.3847
##
## coef Se
## Residual standard error: 0.62163 0.01769
#Predictions vs Observations for 3 classes quartic
postprob <- postprob(nl_gmm4_2)
##
## Posterior classification:
## class1 class2 class3
## N 160.00 15.00 117.00
## % 54.79 5.14 40.07
##
## Posterior classification table:
## --> mean of posterior probabilities in each class
## prob1 prob2 prob3
## class1 0.8997 0.0043 0.0960
## class2 0.0022 0.8845 0.1133
## class3 0.0490 0.0395 0.9115
##
## Posterior probabilities above a threshold (%):
## class1 class2 class3
## prob>0.7 89.38 86.67 89.74
## prob>0.8 80.00 66.67 86.32
## prob>0.9 67.50 66.67 73.50
##
#Lo-Mendell-Rubin likelihood ratio test
calc_lrt(2044,-2668.57, 28, 2, -2644.49, 35, 3)
## Lo-Mendell-Rubin ad-hoc adjusted likelihood ratio rest:
##
## LR = 48.160, LMR LR (df = 7) = 46.142, p < 0.001
#For models fits
summarytable(quartic_model, nl_gmm4_2, nl_gmm4_2a, which = c("G", "loglik", "conv", "npm", "AIC", "BIC", "SABIC", "entropy","ICL", "%class"))
## G loglik conv npm AIC BIC SABIC entropy
## quartic_model 1 -2769.120 1 21 5580.240 5657.452 5590.856 1.0000000
## nl_gmm4_2a 2 -2668.567 1 28 5393.134 5496.083 5407.288 0.7274587
## nl_gmm4_2 3 -2644.492 1 35 5358.984 5487.670 5376.677 0.7630735
## ICL1 ICL2 %class1 %class2 %class3
## quartic_model 5657.452 5657.452 100.00000
## nl_gmm4_2a 5551.245 5546.022 54.45205 45.547945
## nl_gmm4_2 5563.675 5553.602 54.79452 5.136986 40.06849
summaryplot(quartic_model, nl_gmm4_2, nl_gmm4_2a, which = c("BIC", "entropy","loglik"))
#Plotting the best quartic GMM random slope/intercept. Importance. Predicted
#Plotting the model
plot(nl_gmm4_2, which = "residuals")
plot(nl_gmm4_2, which = "postprob")
plot(nl_gmm4_2a, which = "residuals")
#Time points in the study
time_points <- c("T1", "T2", "T3", "T4", "T5", "T6", "T7")
#Numeric values corresponding to time points for plotting
time_numeric <- 1:7
#Intercepts for each class from the nl_gmm4_2 model
intercepts <- c(6.06240, 3.80799, 3.48675)
#Coefficients for polynomial terms for each class from the nl_gmm4_2 model
polynomials <- matrix(
c(0.14202, 3.33939, -4.48520, #Class 1, 2, 3 for poly1
2.37450, 10.72373, 6.36761, #Class 1, 2, 3 for poly2
1.02557, 3.77645, -0.79298, #Class 1, 2, 3 for poly3
0.59568, -2.77164, -2.97614), #Class 1, 2, 3 for poly4
ncol = 3, byrow = TRUE)
#Compute polynomial transformations for each time point using a 4th degree polynomial
poly_values <- poly(time_numeric, 4)
#Compute predicted values for each class and each time point
predicted_values <- sapply(1:3, function(class) {
intercepts[class] + sum(sweep(poly_values, 2, polynomials[class, ], "*"))
})
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
#Prepare data for plotting
df_plot <- expand.grid(Time = time_points, Class = c("Class 1", "Class 2", "Class 3"))
df_plot$Important <- as.vector(predicted_values)
df_plot$Important <- pmin(pmax(df_plot$Important, 1), 7) #for values not exceeding 1/7 points
#Smoothed plot for the quartic function predictions
df_plot$Important <- pmin(pmax(df_plot$Important, 1), 7)
#Plotting the predicted values w/ a smoother
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
geom_smooth(method = "lm", formula = y ~ poly(x, 4), se = FALSE, size = 1.2, aes(group = Class)) +
labs(title = "",
x = "Time Point",
y = "Predicted Importance Level") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous("Predicted Importance Level",
breaks = c(1, 2, 3, 4, 5, 6, 7), limits = c(1, 7)) +
theme_classic() +
theme(legend.position = "right",
plot.title = element_text(size = 16),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_smooth()`).
#Plotting quartic with 2 classes
#Time points in the study
time_points <- c("T1", "T2", "T3", "T4", "T5", "T6", "T7")
#Numeric values corresponding to time points for plotting
time_numeric <- 1:7
#Intercepts for each class from the model
intercepts <- c(6.05673, 3.53197)
#Coefficients for polynomial terms for each class
polynomials <- matrix(
c(-0.02020, -3.29596,
2.36139, 6.95486,
0.99862, -0.16953,
0.48238, -2.83377),
nrow = 4, byrow = TRUE, ncol = 2)
#Compute polynomial transformations for each time point using a 4th degree polynomial
poly_values <- poly(time_numeric, 4)
#Compute predicted values for each class and each time point
predicted_values <- sapply(1:2, function(class) {
intercepts[class] + sum(sweep(poly_values, 2, polynomials[,class], "*"))
})
#Prepare data for plotting
df_plot <- expand.grid(Time = time_points, Class = c("Class 1", "Class 2"))
df_plot$Important <- as.vector(predicted_values)
#Plotting the trajectories
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
geom_line(size = 1.2) +
geom_point(size = 3) +
scale_y_continuous("Predicted Importance Level", limits = c(min(df_plot$Important), max(df_plot$Important))) +
labs(title = "Predicted Trajectories of 'Important' by Class",
x = "Time Point",
y = "Predicted Importance Level") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
#Smoothed
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
geom_smooth(method = "lm", formula = y ~ poly(x, 4), se = FALSE, size = 1.2, aes(group = Class)) +
scale_y_continuous("Predicted Importance Level", limits = c(min(df_plot$Important), max(df_plot$Important))) +
labs(title = "Predicted Trajectories of 'Important' by Class",
x = "Time Point",
y = "Predicted Importance Level") +
scale_color_brewer(palette = "Set1") +
scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.